from requests.exceptions import RequestException
import requests
import json
from pyquery import PyQuery as pq
import time

#获取要爬取的网页的url
def getPage(url):
    try:
        headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"
  }
        res = requests.get(url,headers=headers)
        if res.status_code == 200:
            return res.text
        else:
            return None
    except RequestException:
        return None

#解析爬取的数据并返回值
def parsePage(content):
    doc = pq(content)
    items = doc("li.house-cell")
    for item in items.items():
        yield{
            'index':item.find("div.des a.strongbox").text(),
            'image':item.find("img").attr('lazy_src'),
            'apartment':item.find("div.des p.room").text(),
            'price':item.find("div.money").text()
        }

#解析爬取的数据并返回值
def writeFile(content):
    with open("./result.txt","a",encoding="utf-8") as f:
        f.write(json.dumps(content,ensure_ascii=False)+'\n')

#主函数调度器，负责调度函数的爬取工作
def main(offset):
    url = "https://sz.58.com/chuzu/pn"+str(offset)
    html = getPage(url)
    if html:
        for item in parsePage(html):
            print(item)
            writeFile(item)

#判断主函数入口
if __name__ =='__main__':
    for i in range(70):
        main(offset=i)
        time.sleep(3)

